This is the third script in the processing pipeline for IMC data.
The goal is to generate cell segmentation masks using the Mesmer segmentation deep learning model via the deepcell library. steinbock documentation for deepcell segmentation can be found here.
For cell segmentation, channels corresponding to nuclear and to membrane-specific marker(s) are subset to generate two-channel image stacks that are used as the input to the Mesmer application. After applying segmentation, the quality of the masks is verified visually.
Post-processing parameters can be adjusted to improve the segmentation.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import sys
from cv2 import addWeighted
from deepcell.applications import Mesmer
from matplotlib.colors import ListedColormap
from skimage import color, exposure
from skimage.segmentation import expand_labels, mark_boundaries
from tqdm import tqdm
2023-02-24 23:17:36.442279: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib/python3.9/site-packages/cv2/../../lib64: 2023-02-24 23:17:36.442351: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
from steinbock import io
from steinbock.segmentation import deepcell
print(sys.path)
print(sys.executable)
['/home/ubuntu/bbvolume/Git/T1D_preprocessing/processing', '/opt/conda/lib/python39.zip', '/opt/conda/lib/python3.9', '/opt/conda/lib/python3.9/lib-dynload', '', '/opt/conda/lib/python3.9/site-packages'] /opt/conda/bin/python
Paths to input and output folders as well as antibody panels were exported by the first script (01_Preprocessing.ipynb). Here they are imported again.
with open("./variables/folders.txt", "rb") as handle:
folders = pickle.loads(handle.read())
folders
{'raw': PosixPath('/home/ubuntu/Data3/processing/raw'),
'img': PosixPath('/home/ubuntu/Data3/processing/img'),
'seg_cells': PosixPath('/home/ubuntu/Data3/processing/seg_cells'),
'seg_islets': PosixPath('/home/ubuntu/Data3/processing/seg_islets'),
'masks_cells': PosixPath('/home/ubuntu/Data3/processing/masks_cells'),
'masks_islets': PosixPath('/home/ubuntu/Data3/processing/masks_islets'),
'data_cells': PosixPath('/home/ubuntu/Data3/processing/data_cells'),
'data_islets': PosixPath('/home/ubuntu/Data3/processing/data_islets'),
'data': PosixPath('/home/ubuntu/Data3/processing'),
'git': PosixPath('/home/ubuntu/bbvolume/Git/T1D_preprocessing/processing')}
with open("./variables/panels.txt", "rb") as handle:
panels = pickle.loads(handle.read())
for panel_name, panel in panels.items():
print(panel_name, "\n", panel.head())
panel_names = list(panels.keys())
Islet
channel metal name antibody_clone keep isletseg deepcell \
0 0 Y89 Ghrelin 883622 1 NaN NaN
1 1 In113 Histone H3 D1H2 1 NaN 1.0
2 2 In115 Biotin 1D4-C5 1 NaN NaN
6 6 La139 Somatostatin ICDCLS 1 NaN NaN
7 7 Ce140 Insulin 19H4L12 1 NaN NaN
dimred clustering short_name
0 1.0 1.0 GHRL
1 0.0 0.0 H3
2 1.0 0.0 HBP
6 1.0 1.0 SST
7 1.0 1.0 INS
Immune
channel metal name antibody_clone keep isletseg deepcell \
0 0 Y89 MPO pAb 1 NaN NaN
1 1 In113 Histone H3 D1H2 1 NaN 1.0
2 2 In115 SMA 1A4 1 NaN NaN
6 6 La139 Somatostatin ICDCLS 1 NaN NaN
7 7 Ce140 Insulin 19H4L12 1 NaN NaN
dimred clustering short_name
0 1 1 MPO
1 0 0 H3
2 1 1 SMA
6 1 1 SST
7 1 1 INS
Segmentation stacks are generated by aggregating the channels selcted in panel.csv in the column deepcell.
Cell segmentation requires to construct as 2-channel images with the following structure:
For channel-wise normalization, zscore and min-max methods are available.
In addition, different functions can be used to aggregate channels. Default: np.mean, for other options, see https://numpy.org/doc/stable/reference/routines.statistics.html#averages-and-variances.
# Define image preprocessing options
channelwise_zscore = False
channelwise_minmax = True
aggr_func = np.sum
for panel_name, panel in panels.items():
print("Processing", panel_name, "panel")
# Define channels to use for segmentation ("islet_seg" column in panel(s))
channel_groups = panel["deepcell"].values
channel_groups = np.where(channel_groups == 0, np.nan, channel_groups)
# Define input/output folders
img_subdir = folders["img"] / panel_name
seg_subdir = folders["seg_cells"] / panel_name
seg_subdir.mkdir(exist_ok=True)
# Create and save segmentation stacks
for img_path in io.list_image_files(img_subdir):
segstack = deepcell.create_segmentation_stack(
img = io.read_image(img_path),
channelwise_minmax = channelwise_minmax,
channelwise_zscore = channelwise_zscore,
channel_groups = channel_groups,
aggr_func = aggr_func
)
segstack_file = seg_subdir / f"{img_path.name}"
io.write_image(segstack, segstack_file)
Processing Islet panel Processing Immune panel
# Number of images per panel to show
nb_images_to_show = 5
# Adjust image max intensity if needed (lower value = higher intensity)
max_intensity_nuc = 0.75
max_intensity_mem = 0.5
# Randomly select images
segstacks_dir0 = folders["seg_cells"] / panel_names[0]
segstacks = sorted(segstacks_dir0.glob("*.tiff"))
rng = np.random.default_rng()
indexes = rng.choice(len(segstacks), nb_images_to_show, replace=False)
# Plot
fig, axs = plt.subplots(nb_images_to_show, 4,
figsize=(16, 4*nb_images_to_show))
for i,idx in enumerate(indexes):
for j, (panel_name, panel) in enumerate(panels.items()):
## load images and masks
seg_subdir = folders["seg_cells"] / panel_name
img_name = segstacks[idx].name.replace(panel_names[0], panel_name)
img = io.read_image(seg_subdir / img_name)
## plot images
axs[i,j].imshow(img[0,:,:], vmin=0, vmax=max_intensity_nuc)
axs[i,j].set_title(img_name + ": nuclei")
axs[i,j].axis('off')
axs[i,j+2].imshow(img[1,:,:], vmin=0, vmax=max_intensity_mem)
axs[i,j+2].set_title(img_name + ": membrane")
axs[i,j+2].axis('off')
segmentation_type should be one either whole-cell or nuclear.
The image resolution should also be specified (microns per pixel).
Several post-processing arguments can be passed to the deepcell application. Defaults for nuclear and whole-cell segmentation are indicated in brackets.
maxima_threshold: set lower if cells are missing (default for nuclear segmentation=0.1, default for nuclear segmentation=0.075).maxima_smooth: (default=0).interior_threshold: set higher if you your nuclei are too large (default=0.2).interior_smooth: larger values give rounder cells (default=2).small_objects_threshold: depends on the image resolution (default=50).fill_holes_threshold: (default=10). radius: (default=2).Cell labels can also be expanded by defining an expansion_distance (mostly useful for nuclear segmentation).
# Segmentation type
segmentation_type = "whole-cell" # "nuclear"
# Post-processing arguments for whole-cell segmentation
kwargs_whole_cell = {
'maxima_threshold': 0.075,
'maxima_smooth': 0,
'interior_threshold': 0.2,
'interior_smooth': 2,
'small_objects_threshold': 25,
'fill_holes_threshold': 15,
'radius': 2
}
# Post-processing arguments for nuclear segmentation
kwargs_nuclear = {
'maxima_threshold': 0.1,
'maxima_smooth': 0,
'interior_threshold': 0.2,
'interior_smooth': 2,
'small_objects_threshold': 15,
'fill_holes_threshold': 15,
'radius': 2
}
# Mask pixel expansion (0 = no expansion)
expansion_distance = 0
# Image resolution (microns per pixels)
image_mpp = 1
app = Mesmer()
2023-02-25 00:57:13.770955: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib/python3.9/site-packages/cv2/../../lib64: 2023-02-25 00:57:13.771320: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) 2023-02-25 00:57:13.771468: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (0b7302342a6c): /proc/driver/nvidia/version does not exist 2023-02-25 00:57:13.800887: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
for panel_name, panel in panels.items():
# Input/output
print("Processing", panel_name, "panel")
seg_subdir = folders["seg_cells"] / panel_name
masks_dir = folders["masks_cells"] / panel_name
masks_dir.mkdir(exist_ok = True)
masks_subdir = masks_dir / segmentation_type
masks_subdir.mkdir(exist_ok = True)
segstacks = io.list_image_files(seg_subdir)
for stack in tqdm(segstacks):
# Load images
img = io.read_image(stack)
img = np.moveaxis(img, 0, 2)
img = np.expand_dims(img.data, 0)
# Predict masks
mask = app.predict(
img, image_mpp = image_mpp, compartment = segmentation_type,
postprocess_kwargs_whole_cell = kwargs_whole_cell,
postprocess_kwargs_nuclear = kwargs_nuclear
)
mask = mask.squeeze()
mask = expand_labels(mask, distance = float(expansion_distance))
# Save masks
mask_file = masks_subdir / stack.name
io.write_mask(mask, mask_file)
Processing Islet panel
0%| | 0/7650 [00:00<?, ?it/s]2023-02-25 00:57:29.486000: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 67108864 exceeds 10% of free system memory. 2023-02-25 00:57:29.497481: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 67108864 exceeds 10% of free system memory. 2023-02-25 00:57:29.504807: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 67108864 exceeds 10% of free system memory. 2023-02-25 00:57:29.527365: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 67108864 exceeds 10% of free system memory. 2023-02-25 00:57:29.573272: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 134217728 exceeds 10% of free system memory. /opt/conda/lib/python3.9/site-packages/deepcell_toolbox/deep_watershed.py:179: FutureWarning: `selem` is a deprecated argument name for `h_maxima`. It will be removed in version 1.0. Please use `footprint` instead. markers = h_maxima(image=maxima, 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7650/7650 [5:28:52<00:00, 2.58s/it]
Processing Immune panel
58%|████████████████████████████████████████████████████████████████████████▉ | 4467/7650 [4:48:20<3:53:02, 4.39s/it]
The nuclear channel, cell mask and overlays images and labels are shown side-by-side. Full size images are shown on odd rows and zoom-ins are shown on even rows.
Adjust the image intensity and overlaid mask transparency by adjusting the corresponding variables.
For higher magnification images, adjust the coordinates and dimension if needed.
Overlays can be saved (in subfolders of the mask_cells directory).
# Number of images to show (per panel)
nb_images_to_show = 5
# adjust image max intensity if needed (lower value = higher intensity)
max_intensity = 0.75
overlay_alpha = 0.25
# Color palette for masks
cmap = ListedColormap(np.random.rand(10**3,3))
cmap.colors[0] = [1,1,1]
# Should overlay images be saved?
save_overlay = False
total_images = nb_images_to_show * len(panels.keys())
fig, ax = plt.subplots(2*total_images, 3,
figsize=(12, 8*total_images))
for i, panel_name in enumerate(panel_names):
# List masks and images
masks_subdir = folders["masks_cells"] / panel_name / segmentation_type
seg_subdir = folders["seg_cells"] / panel_name
masks = io.list_mask_files(masks_subdir)
for j in range(nb_images_to_show):
k = 2 * (j + (i * nb_images_to_show))
# Load random images and masks, create an overlay
rng = np.random.default_rng()
ix = rng.choice(len(masks))
cur_filename = masks[ix].name
img = io.read_image(seg_subdir / cur_filename)
cur_img = (img / np.amax(img))[0,...]
mask = io.read_image(masks_subdir / cur_filename)
cur_mask = mask[0,...].astype('uint16')
# print(cur_img.shape, cur_mask.shape)
# Overlay image and cell labels
cur_img = exposure.adjust_sigmoid(cur_img, 0.1)
cur_img = color.gray2rgb(cur_img)[...,0]
overlay1 = color.label2rgb(cur_mask, cur_img,
alpha = overlay_alpha, bg_label=0)
# Overlay image and mask borders
borders = mark_boundaries(cur_img, cur_mask, mode = "thin")[...,0]
overlay2 = addWeighted(cur_img*255, 1, borders*65535, 1, 0)
# Display images and masks
ax[k,0].imshow(img[0,:,:], vmax = max_intensity)
ax[k,0].set_title(cur_filename)
ax[k,1].imshow(overlay1, cmap = cmap)
ax[k,1].set_title("Overlay")
ax[k,1].axis('off')
ax[k,2].imshow(overlay2)
ax[k,2].set_title("Borders")
ax[k,2].axis('off')
# Higher magnification (change coordinates and dimensions if needed)
xstart = 100
ystart = 100
dim = 100
ax[k+1,0].imshow(img[0,:,:], vmin = 0, vmax = max_intensity)
ax[k+1,0].set_xlim([xstart, xstart + dim])
ax[k+1,0].set_ylim([ystart, ystart + dim])
ax[k+1,1].imshow(overlay1, cmap = cmap)
ax[k+1,1].set_xlim([xstart, xstart + dim])
ax[k+1,1].set_ylim([ystart, ystart + dim])
ax[k+1,1].axis('off')
ax[k+1,2].imshow(overlay2)
ax[k+1,2].set_xlim([xstart, xstart + dim])
ax[k+1,2].set_ylim([ystart, ystart + dim])
ax[k+1,2].axis('off')
# Save the overlay
if save_overlay:
overlay_dir = folders["masks_cells"] / panel_name / "overlay"
overlay_dir.mkdir(exist_ok = True)
overlay_file = overlay_dir / masks[ix].name
io.write_image(np.moveaxis(overlay1, -1, 0), overlay_file)